TO DO LIST

library(ggplot2)
library(tidyverse)
── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ lubridate 1.9.2     ✔ tibble    3.2.1
✔ purrr     1.0.1     ✔ tidyr     1.3.0
✔ readr     2.1.4     ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(lubridate)
library(dplyr)

Explore columns

for (i in 1:dim(tracks)[2]){
  print(paste0(i,"----",colnames(tracks)[i]))
  print(tracks[1:10,i])
}
[1] "1----artists"
 [1] "Nayt"                      "Mahmood"                  
 [3] "NASKA"                     "NASKA"                    
 [5] "Bresh"                     "Marracash"                
 [7] "Rino Gaetano"              "NASKA"                    
 [9] "Pinguini Tattici Nucleari" "Marracash"                
[1] "2----album_name"
 [1] "Un bacio (Deluxe Edition)" "Brividi"                  
 [3] "REBEL"                     "REBEL"                    
 [5] "ORO BLU"                   "NOI, LORO, GLI ALTRI"     
 [7] "Q Concert"                 "REBEL (Deluxe)"           
 [9] "Giovani Wannabe"           "NOI, LORO, GLI ALTRI"     
[1] "3----year"
 [1] 2016 2022 2022 2022 2022 2021 1981 2022 2022 2021
[1] "4----season"
 [1] "Spring"  "Winter"  "Spring"  "Spring"  "Spring"  "Autumn"  "missing"
 [8] "Autumn"  "Spring"  "Autumn" 
[1] "5----popularity"
 [1] 32 65 53 46 59 64 46 55 63 66
[1] "6----acousticness"
 [1] 0.50300 0.44800 0.02820 0.00162 0.36900 0.48500 0.05690 0.01360
 [9] 0.04090 0.57000
[1] "7----danceability"
 [1] 0.792 0.523 0.455 0.594 0.489 0.614 0.566 0.405 0.739 0.727
[1] "8----energy"
 [1] 0.631 0.614 0.677 0.920 0.367 0.464 0.376 0.616 0.810 0.698
[1] "9----instrumentalness"
 [1] 0 0 0 0 0 0 0 0 0 0
[1] "10----liveness"
 [1] 0.1420 0.2540 0.5640 0.1590 0.1980 0.0812 0.4740 0.1090 0.1220 0.1100
[1] "11----loudness"
 [1] -11.908  -4.435  -5.041  -5.566 -10.254  -8.473 -11.643  -6.442
 [9]  -5.317  -9.676
[1] "12----speechiness"
 [1] 0.0992 0.0347 0.0338 0.0519 0.0878 0.0803 0.0443 0.0347 0.0311 0.3650
[1] "13----tempo"
 [1] 108.076 122.962 179.981 139.929 131.302 152.092 176.366 160.219
 [9] 127.972  92.915
[1] "14----key"
 [1] 10  7  4  2  0  9  0  1 11  8
[1] "15----mode"
 [1] 1 1 1 1 1 1 1 1 0 1
[1] "16----duration_ms"
 [1] 141111 199146 219846 170388 231602 195799 217629 179815 212966 234446
[1] "17----valence"
 [1] 0.805 0.342 0.266 0.492 0.524 0.293 0.486 0.199 0.948 0.356
[1] "18----time_signature"
 [1] 4 4 4 4 4 4 4 3 4 4
[1] "19----track.id"
 [1] "3o5AiG9Omh5GWlBNKEVcA9" "1ZMGp9MTXbtAPvcKa0U3zS"
 [3] "6fPGBlx8wsAxhoDn7BwiAH" "75U0n5xhZT3al2oC3I61rG"
 [5] "6I28wnb48iMVVfyTSf4lkx" "0WgVvy1KelQxG6KBUukTWI"
 [7] "5mA4wMDUbf9A2N0vzCR80R" "0jkBVkeS6L5NtvPn29NeIK"
 [9] "7iLuBTHJSXM2HalKHFqEEy" "4gxRyOZefp95AXZFaztdtO"
[1] "20----genre_1"
 [1] "italian hip hop"   "italian adult pop" "emo rap italiano" 
 [4] "emo rap italiano"  "italian hip hop"   "italian hip hop"  
 [7] "canzone d'autore"  "emo rap italiano"  "bergamo indie"    
[10] "italian hip hop"  
[1] "21----genre_2"
 [1] "italian" "italian" "italian" "italian" "italian" "italian" NA       
 [8] "italian" NA        "italian"

Ranges

for (i in 5:18){
  print(c(i,colnames(tracks)[i],range(tracks[,i])))
}
[1] "5"          "popularity" "19"         "95"        
[1] "6"            "acousticness" "0.000519"     "0.921"       
[1] "7"            "danceability" "0.352"        "0.877"       
[1] "8"      "energy" "0.228"  "0.968" 
[1] "9"                "instrumentalness" "0"               
[4] "0.014"           
[1] "10"       "liveness" "0.0344"   "0.667"   
[1] "11"       "loudness" "-14.291"  "-2.363"  
[1] "12"          "speechiness" "0.0261"      "0.365"      
[1] "13"      "tempo"   "74.836"  "197.773"
[1] "14"  "key" "0"   "11" 
[1] "15"   "mode" "0"    "1"   
[1] "16"          "duration_ms" "137562"      "326893"     
[1] "17"      "valence" "0.0397"  "0.948"  
[1] "18"             "time_signature" "3"              "5"             
numerical_values = c(5:15,17,18)
length(numerical_values)
[1] 13

distributions

numerical_values = 5:18
for(col in colnames(tracks)[numerical_values]){
  bins = range(tracks[[col]])[2]-range(tracks[[col]])[1]
  plot <- ggplot(data = tracks, mapping = aes(x =.data[[col]])) +
  geom_histogram(bins =30, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = col, x = "", y = "Frequency") +
  theme_bw()
  
  print(plot)
}

# Add a column indicating the source data frame
df_list_with_names <- lapply(seq_along(dataset_list), function(i) {
  dataset_list[[i]]$id <- names(dataset_list)[i]
  # here you can add all kind of columns from the survey dataset
  #dataset_list[[i]]$study <- dataset_survey[i,"Che.cosa.studi."]
  dataset_list[[i]]$state <- dataset_survey[i,"In.che.stato.vivi."]
  return(dataset_list[[i]])
})

# Combine the data frames into a single data frame
df_all_in_one <- bind_rows(df_list_with_names)

# Print the result
print(df_all_in_one[seq(1,1000,50),])
df_all_in_one=df_all_in_one[,-1]
table(df_all_in_one$genre_2)

alternative    brighton   classical         hip       house       indie     italian 
         45           1          57         142          23          88        2149 
   italiana    italiano     missing         pop        rock 
         53          98          15         813         173 
numerical_values_new = numerical_values[-which(numerical_values%in%c(9,10))]
columns = colnames(df_all_in_one)[numerical_values_new]
#columns =colnames(tracks)[7:10]
for(i in 1:(length(columns)-1)){
  for(j in (i+1):length(columns)){
    plot = ggplot(df_all_in_one, aes(df_all_in_one[,columns[i]],df_all_in_one[,columns[j]] , colour = state )) + 
        geom_point()+
        theme_bw()+
        theme(legend.position = "none")+
        labs(x =columns[i],y = columns[j] )
    print(plot)
  }
}

Blueprint for plots

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIFRPIERPIExJU1QNCi0gcmlkdWNlIHRoZSBudW1iZXIgb2YgZ2VucmVzLT4gcHJvZHVjZSBhIGxpc3QNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeShkcGx5cikNCmBgYA0KDQpgYGB7cn0NCmRhdGFzZXRfbGlzdCA9IHJlYWRSRFMoIi4uLy4uL2RhdGEvZGF0YV9nZW5yZXNfY2xlYW5lZC5SRGF0YSIpDQp0cmFja3MgPSBkYXRhc2V0X2xpc3RbWzFdXQ0KZGltKHRyYWNrcykNCmhlYWQodHJhY2tzKQ0KdHJhY2tzID0gdHJhY2tzWywtMV0NCmBgYA0KDQoNCiMgRXhwbG9yZSBjb2x1bW5zDQpgYGB7cn0NCmZvciAoaSBpbiAxOmRpbSh0cmFja3MpWzJdKXsNCiAgcHJpbnQocGFzdGUwKGksIi0tLS0iLGNvbG5hbWVzKHRyYWNrcylbaV0pKQ0KICBwcmludCh0cmFja3NbMToxMCxpXSkNCn0NCmBgYA0KIyBSYW5nZXMNCmBgYHtyfQ0KbnVtZXJpY2FsX3ZhbHVlcyA9IDU6MTgNCmZvciAoaSBpbiBudW1lcmljYWxfdmFsdWVzKXsNCiAgcHJpbnQoYyhpLGNvbG5hbWVzKHRyYWNrcylbaV0scmFuZ2UodHJhY2tzWyxpXSkpKQ0KfQ0KbGVuZ3RoKG51bWVyaWNhbF92YWx1ZXMpDQpgYGANCg0KDQojIGRpc3RyaWJ1dGlvbnMNCmBgYHtyfQ0KDQpmb3IoY29sIGluIGNvbG5hbWVzKHRyYWNrcylbbnVtZXJpY2FsX3ZhbHVlc10pew0KICBiaW5zID0gcmFuZ2UodHJhY2tzW1tjb2xdXSlbMl0tcmFuZ2UodHJhY2tzW1tjb2xdXSlbMV0NCiAgcGxvdCA8LSBnZ3Bsb3QoZGF0YSA9IHRyYWNrcywgbWFwcGluZyA9IGFlcyh4ID0uZGF0YVtbY29sXV0pKSArDQogIGdlb21faGlzdG9ncmFtKGJpbnMgPTMwLCBmaWxsID0gImJsdWUiLCBjb2xvciA9ICJibGFjayIsIGFscGhhID0gMC43KSArDQogIGxhYnModGl0bGUgPSBjb2wsIHggPSAiIiwgeSA9ICJGcmVxdWVuY3kiKSArDQogIHRoZW1lX2J3KCkNCiAgDQogIHByaW50KHBsb3QpDQp9DQoNCmBgYA0KDQoNCg0KDQpgYGB7cn0NCiMgQWRkIGEgY29sdW1uIGluZGljYXRpbmcgdGhlIHNvdXJjZSBkYXRhIGZyYW1lDQpkZl9saXN0X3dpdGhfbmFtZXMgPC0gbGFwcGx5KHNlcV9hbG9uZyhkYXRhc2V0X2xpc3QpLCBmdW5jdGlvbihpKSB7DQogIGRhdGFzZXRfbGlzdFtbaV1dJGlkIDwtIG5hbWVzKGRhdGFzZXRfbGlzdClbaV0NCiAgIyBoZXJlIHlvdSBjYW4gYWRkIGFsbCBraW5kIG9mIGNvbHVtbnMgZnJvbSB0aGUgc3VydmV5IGRhdGFzZXQNCiAgI2RhdGFzZXRfbGlzdFtbaV1dJHN0dWR5IDwtIGRhdGFzZXRfc3VydmV5W2ksIkNoZS5jb3NhLnN0dWRpLiJdDQogIGRhdGFzZXRfbGlzdFtbaV1dJHN0YXRlIDwtIGRhdGFzZXRfc3VydmV5W2ksIkluLmNoZS5zdGF0by52aXZpLiJdDQogIHJldHVybihkYXRhc2V0X2xpc3RbW2ldXSkNCn0pDQoNCiMgQ29tYmluZSB0aGUgZGF0YSBmcmFtZXMgaW50byBhIHNpbmdsZSBkYXRhIGZyYW1lDQpkZl9hbGxfaW5fb25lIDwtIGJpbmRfcm93cyhkZl9saXN0X3dpdGhfbmFtZXMpDQoNCiMgUHJpbnQgdGhlIHJlc3VsdA0KcHJpbnQoZGZfYWxsX2luX29uZVtzZXEoMSwxMDAwLDUwKSxdKQ0KZGZfYWxsX2luX29uZT1kZl9hbGxfaW5fb25lWywtMV0NCmBgYA0KYGBge3J9DQp0YWJsZShkZl9hbGxfaW5fb25lJGdlbnJlXzIpDQpgYGANCg0KDQoNCg0KYGBge3J9DQpudW1lcmljYWxfdmFsdWVzX25ldyA9IG51bWVyaWNhbF92YWx1ZXNbLXdoaWNoKG51bWVyaWNhbF92YWx1ZXMlaW4lYyg5LDEwKSldDQpjb2x1bW5zID0gY29sbmFtZXMoZGZfYWxsX2luX29uZSlbbnVtZXJpY2FsX3ZhbHVlc19uZXddDQojY29sdW1ucyA9Y29sbmFtZXModHJhY2tzKVs3OjEwXQ0KZm9yKGkgaW4gMToobGVuZ3RoKGNvbHVtbnMpLTEpKXsNCiAgZm9yKGogaW4gKGkrMSk6bGVuZ3RoKGNvbHVtbnMpKXsNCiAgICBwbG90ID0gZ2dwbG90KGRmX2FsbF9pbl9vbmUsIGFlcyhkZl9hbGxfaW5fb25lWyxjb2x1bW5zW2ldXSxkZl9hbGxfaW5fb25lWyxjb2x1bW5zW2pdXSAsIGNvbG91ciA9IHN0YXRlICkpICsgDQogICAgICAgIGdlb21fcG9pbnQoKSsNCiAgICAgICAgdGhlbWVfYncoKSsNCiAgICAgICAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKSsNCiAgICAgICAgbGFicyh4ID1jb2x1bW5zW2ldLHkgPSBjb2x1bW5zW2pdICkNCiAgICBwcmludChwbG90KQ0KICB9DQp9DQpgYGANCg0KDQojIEJsdWVwcmludCBmb3IgcGxvdHMgDQpgYGB7cn0NCnBsb3QgPSBnZ3Bsb3QodHJhY2tzLCBhZXMoZW5lcmd5LGxvdWRuZXNzICwgY29sb3VyID0gZ2VucmVfMSApKSArIA0KICBnZW9tX3BvaW50KCkrDQogIHRoZW1lX2J3KCkrDQogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikNCg0KcHJpbnQocGxvdCkNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=